import json
import pymysql
import requests
from datetime import datetime
from urllib.parse import urljoin
from lxml import html
from bs4 import BeautifulSoup

# 特免证明出具数量
BASE_URL = 'http://cmp.msa.gov.cn/crew_mgt/ShowReport.wx?DISPLAY_TYPE=1&PAGEID=report_exemption_query&report1_ALLDATASETS_RECORDCOUNT=report1__default_default_default_key__default_default_default_key%3D1%3B&refreshComponentGuid=report_exemption_query_guid_report1&txtappltime1=2024-10-01&txtappltime2=2024-12-31&SEARCHREPORT_ID=report1&WX_ISAJAXLOAD=true'
headers = {
    'Accept': 'application/json',
    'Content-Type': 'application/json;charset=utf-8'
}
cookies = {
    # OAMAuthnCookie_cmp.msa.gov.cn:80 参数用于登录认证
    'OAMAuthnCookie_cmp.msa.gov.cn:80': '%2BYwHhhdTjpzkLVHs2j9oasYwRxIDXtY4CBrXNMHoy1dv4nGH2gZM4hqYykfWK6M20thTdlQdJaiDmCjqr7XN%2F2aQ%2Bfm7NpQqrN127i5ggE6GLEdM2jK6aU%2BFYsuMdT7ebaP5ZwMtye91%2FGPQ17%2FpHCQrta21nsgu6mBOfG69kYHTXwUiDAe1z4cJ0zA7bkt6ojrTUWBNWTgj3LRZ1tVgzL1WQbRPAWaeJnbmYh7c%2FMehbK%2BBEgjVKMyN8k8dEro4bWvlocbBWIkHKozb1p6CkRGCtNDJw1LaOrktNZPe%2B%2FtrnMGxHMHOWyJZ3FA3Nqrd3yrPFR1b073wiwytHhoEKF9R8Y7WuWOq8XyN%2B6ghym1RxXPdp%2Fs9dCz4%2FTwMmQInVaDQnhXWaJ%2BSsD7t1bslTXH5Py%2FOlA5A48S2lCYW8lY0wwEew8R9FoDMBYRSqi5FnxDhp%2Bl5O4noRsAQnh2FLBTsCYKUFTJEAFHN4sDD3dU%3D',

    # OAMAuthnHintCookie和JSESSIONID 参数用于声明返回接口数据，不配置的会一直返回HTML
    'OAMAuthnHintCookie': '1',
    'JSESSIONID': '4i9ud1j0VL5O_3ZF3ytPiqULPagxLX6-iWZ7uEyn5MX7chvYc_vX!49340319'
}

session = requests.Session()
response_lo = session.post(BASE_URL, headers=headers, cookies=cookies)
hrml_content = response_lo.text
print(hrml_content)
if response_lo.status_code == 200:
    soup = BeautifulSoup(hrml_content, 'lxml')
    # 获取html中所有的table
    # tables = soup.find_all('table')
    # 获取html中指定的table
    tables = soup.find_all('table', class_="table table-hover table-striped cls-data-table")
    # 获取table中所有的td
    rows = tables[0].find_all('td')
    # print(">>>")
    # print(rows)
    # print(tables)
    data = []
    dataStr = []
    # 循环获取td里面div的值
    for row in rows:
        cols = row.find_all('div')
        cols = [ele.text.strip() for ele in cols]
        data.append(cols[0])
    # print("data")
    # print(data)
    dataStr.append(data)
    # 数据库连接配置
    db_config = {
        'host': '191.254.8.220',
        'user': 'root',
        'password': 'Szmsa@520',
        'db': 'spiders',
        'charset': 'utf8mb4',
        'cursorclass': pymysql.cursors.DictCursor
    }
    # 连接数据库
    connection = pymysql.connect(**db_config)

    try:
        # 开始事务
        with connection.cursor() as cursor:
            # 拼装需要插入字段对应的%号个数
            dataValue = ", ".join(['%s'] * len(data))
            print(dataStr)
            sql = f"INSERT INTO xt_mariner_report_exemption_query (serial_number,user_name,id_number,bid_number,declarant,bid_org,bid_date,status,transactor) VALUES ({dataValue})"
            connection.cursor().executemany(sql, dataStr)
            connection.commit()
    finally:
        connection.close()
